First install the packages

install.packages("tidyverse")
trying URL 'https://cran.rstudio.com/bin/macosx/contrib/4.1/tidyverse_1.3.1.tgz'
Content type 'application/x-gzip' length 421072 bytes (411 KB)
==================================================
downloaded 411 KB

The downloaded binary packages are in
    /var/folders/lr/hrpr063x28jd67wnc08d040w0000gn/T//RtmpgikMqG/downloaded_packages
install.packages("nycflights13")
trying URL 'https://cran.rstudio.com/bin/macosx/contrib/4.1/nycflights13_1.0.2.tgz'
Content type 'application/x-gzip' length 4502373 bytes (4.3 MB)
==================================================
downloaded 4.3 MB

The downloaded binary packages are in
    /var/folders/lr/hrpr063x28jd67wnc08d040w0000gn/T//RtmpgikMqG/downloaded_packages

Now call the libraries

library(nycflights13)
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ────────────────────────────────────────── tidyverse 1.3.1 ──
✓ ggplot2 3.3.5     ✓ purrr   0.3.4
✓ tibble  3.1.3     ✓ dplyr   1.0.7
✓ tidyr   1.1.3     ✓ stringr 1.4.0
✓ readr   2.0.1     ✓ forcats 0.5.1
── Conflicts ───────────────────────────────────────────── tidyverse_conflicts() ──
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()

Using pipe (%>%) to simplify your code:

Now, suppose we are trying to generate a new tibble that contains all flights in July and adds a new column, reader = “John”.

flights 
(flights_July = filter(flights, month == 7))
(flights_July_ZY = mutate(flights_July, reader="John"))

Let’s use Pipe to simplify the code with three steps: 1. remove the name of each newly generated tibbles 2. remove the first argument of each operation verbs 3. add “%>%”

flights %>% 
  filter(month == 7) %>%
  mutate(reader="John")

Group Challenge: Investigate the relationship between the distance and average delay for each location in July? Hint: Use summarise, group_by, filter, arrange

flights %>% 
  filter(month == 7) %>% 
  group_by(dest) %>% 
  summarise(dist = mean(distance, na.rm= TRUE), delay = mean(arr_delay, na.rm = TRUE)) %>% 
  arrange(desc(delay))
NA

Apply your skills to address covid19 statistics

install.packages("covid19.analytics")
trying URL 'https://cran.rstudio.com/bin/macosx/contrib/4.1/covid19.analytics_2.1.tgz'
Content type 'application/x-gzip' length 3562127 bytes (3.4 MB)
==================================================
downloaded 3.4 MB

The downloaded binary packages are in
    /var/folders/lr/hrpr063x28jd67wnc08d040w0000gn/T//RtmpgikMqG/downloaded_packages
library(covid19.analytics)
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
library(tidyverse)
# obtain time series data for "confirmed" cases
(covid19.confirmed.cases <- covid19.data("ts-confirmed"))
Data being read from JHU/CCSE repository
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
Reading data from https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv
Data retrieved on 2021-09-07 14:10:59 || Range of dates on data: 2020-01-22--2021-09-06 | Nbr of records: 279
-------------------------------------------------------------------------------- 
# First check the number of variables. A function that helps us explore the number of days/weeks between two dates. 
difftime("2021-9-6", "2020-1-22", units = "days")
Time difference of 592.9583 days

Individual excercise (3 mins)

# Find the top ten countries that have the largest number of confirmed cases on 2021-09-02 (recommend using pipe!)
# Hint-1: the following verbs are recommended: select, group_by, summarise, filter
# Hint-2: filter(rank( desc(X) )) <= 10 will help you identify the top ten values of X

covid19.confirmed.cases %>% 
  select(Country.Region, `2021-09-02`) %>% 
  group_by(Country.Region) %>% 
  summarise(case_sum_02 = sum(`2021-09-02`, na.rm = TRUE)) %>% 
  filter(rank(desc(case_sum_02)) <= 10 )
NA

Individual excercise (5 mins): how to define daily growth rate?

# Adapt the above code to find the top ten countries that have the highest daily growth rate of confirmed cases on 2021-09-02. 
#Hint-1: define a new variable, growth_rate, either within summarise() or using mutate()
#Hint-2: daily growth rate = (cases_day_2 - cases_day_1)/cases_day_2

covid19.confirmed.cases %>% 
  select(Country.Region, `2021-09-02`,`2021-09-01` ) %>% 
  group_by(Country.Region) %>% 
  summarise(case_sum_02 = sum(`2021-09-02`, na.rm = TRUE), case_sum_01 = sum(`2021-09-01`, na.rm = TRUE)) %>%
  mutate(growth_rate = (case_sum_02 - case_sum_01) / case_sum_01 ) %>% 
  filter(rank(desc(growth_rate)) <= 10 )
NA

Now let’s take a look at the US data!

(CV_US = covid19.US.data() )
Data being read from JHU/CCSE repository
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
Reading data from https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv
Data retrieved on 2021-09-09 13:25:06 || Range of dates on data: 2020-01-22--2021-09-08 | Nbr of records: 3342
-------------------------------------------------------------------------------- 
Data being read from JHU/CCSE repository
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
Reading data from https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_US.csv
Data retrieved on 2021-09-09 13:25:08 || Range of dates on data: 2020-01-22--2021-09-08 | Nbr of records: 3342
-------------------------------------------------------------------------------- 

Use across() function to select a range of variables

# Operation by row
# Create a new Table, CV_US_Aug, that contains the by-state sum of daily confirmed cases from 2021-08-01 to 2021-08-31


(CV_US_Aug <- CV_US %>% 
    group_by(Province_State) %>% 
    summarise(across(`2021-08-01`:`2021-08-31`, sum))
)
NA

Individual excercise (3 mins): use rowSums() to sum by row.

# Find the monthly sum of confirmed cases for each US state in August, 2021. 
# Hint-1: rowSums(across(`date-1`:`date-2`), na.rm = TRUE) will return the by-row sum of values in a tibble. 

CV_US_Aug %>% 
    group_by(Province_State) %>%
    summarise( Sum_Aug_2021 = rowSums(across(`2021-08-01`:`2021-08-31`, na.rm = TRUE)))
# Your code goes here

Group challenge*: Identify the top ten states that have the highest mean growth rate of COVID cases in August, 2021.

Hint-1: across() allows arithmetic operations for a range of variables. Hint-2: rowMeans() will return the by-row average of values in a tibble

CV_US %>% 
   group_by(Province_State) %>%
   summarise( Sum_Aug_2021 = rowSums(across(`2021-08-01`:`2021-08-31`, na.rm = TRUE))
              )
`summarise()` has grouped output by 'Province_State'. You can override using the `.groups` argument.
   mutate()
Error in UseMethod("mutate") : 
  no applicable method for 'mutate' applied to an object of class "NULL"
LS0tCnRpdGxlOiAiVHJhbnNmb3JtLUFzc2VydCIKYXV0aG9yOiAiSm9obiBZYW5nIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIEZpcnN0IGluc3RhbGwgdGhlIHBhY2thZ2VzCmBgYHtyfQppbnN0YWxsLnBhY2thZ2VzKCJ0aWR5dmVyc2UiKQppbnN0YWxsLnBhY2thZ2VzKCJueWNmbGlnaHRzMTMiKQpgYGAKCiMgTm93IGNhbGwgdGhlIGxpYnJhcmllcwoKYGBgYHtyfQpsaWJyYXJ5KG55Y2ZsaWdodHMxMykKbGlicmFyeSh0aWR5dmVyc2UpCmBgYAoKVXNpbmcgcGlwZSAoJT4lKSB0byBzaW1wbGlmeSB5b3VyIGNvZGU6CgpOb3csIHN1cHBvc2Ugd2UgYXJlIHRyeWluZyB0byBnZW5lcmF0ZSBhIG5ldyB0aWJibGUgdGhhdCBjb250YWlucyBhbGwgZmxpZ2h0cyBpbiBKdWx5IGFuZCBhZGRzIGEgbmV3IGNvbHVtbiwgcmVhZGVyID0gIkpvaG4iLiAgIAoKYGBgYHtyfQpmbGlnaHRzIAooZmxpZ2h0c19KdWx5ID0gZmlsdGVyKGZsaWdodHMsIG1vbnRoID09IDcpKQooZmxpZ2h0c19KdWx5X1pZID0gbXV0YXRlKGZsaWdodHNfSnVseSwgcmVhZGVyPSJKb2huIikpCmBgYAoKTGV0J3MgdXNlIFBpcGUgdG8gc2ltcGxpZnkgdGhlIGNvZGUgd2l0aCB0aHJlZSBzdGVwczoKMS4gcmVtb3ZlIHRoZSBuYW1lIG9mIGVhY2ggbmV3bHkgZ2VuZXJhdGVkIHRpYmJsZXMKMi4gcmVtb3ZlIHRoZSBmaXJzdCBhcmd1bWVudCBvZiBlYWNoIG9wZXJhdGlvbiB2ZXJicwozLiBhZGQgIiU+JSIKCmBgYGB7cn0KZmxpZ2h0cyAlPiUgCiAgZmlsdGVyKG1vbnRoID09IDcpICU+JQogIG11dGF0ZShyZWFkZXI9IkpvaG4iKQpgYGAKCkdyb3VwIENoYWxsZW5nZTogSW52ZXN0aWdhdGUgdGhlIHJlbGF0aW9uc2hpcCBiZXR3ZWVuIHRoZSBkaXN0YW5jZSBhbmQgYXZlcmFnZSBkZWxheSBmb3IgZWFjaCBsb2NhdGlvbiBpbiBKdWx577yfCkhpbnQ6IFVzZSBzdW1tYXJpc2UsIGdyb3VwX2J5LCBmaWx0ZXIsIGFycmFuZ2UKCmBgYGB7cn0KZmxpZ2h0cyAlPiUgCiAgZmlsdGVyKG1vbnRoID09IDcpICU+JSAKICBncm91cF9ieShkZXN0KSAlPiUgCiAgc3VtbWFyaXNlKGRpc3QgPSBtZWFuKGRpc3RhbmNlLCBuYS5ybT0gVFJVRSksIGRlbGF5ID0gbWVhbihhcnJfZGVsYXksIG5hLnJtID0gVFJVRSkpICU+JSAKICBhcnJhbmdlKGRlc2MoZGVsYXkpKQogIApgYGAKCiMgQXBwbHkgeW91ciBza2lsbHMgdG8gYWRkcmVzcyBjb3ZpZDE5IHN0YXRpc3RpY3MKCmBgYGB7cn0KaW5zdGFsbC5wYWNrYWdlcygiY292aWQxOS5hbmFseXRpY3MiKQpgYGAKCmBgYGB7cn0KbGlicmFyeShjb3ZpZDE5LmFuYWx5dGljcykKbGlicmFyeSh0aWR5dmVyc2UpCmBgYAoKYGBgYHtyfQojIG9idGFpbiB0aW1lIHNlcmllcyBkYXRhIGZvciAiY29uZmlybWVkIiBjYXNlcwooY292aWQxOS5jb25maXJtZWQuY2FzZXMgPC0gY292aWQxOS5kYXRhKCJ0cy1jb25maXJtZWQiKSkKYGBgCgpgYGBge3J9CiMgRmlyc3QgY2hlY2sgdGhlIG51bWJlciBvZiB2YXJpYWJsZXMuIEEgZnVuY3Rpb24gdGhhdCBoZWxwcyB1cyBleHBsb3JlIHRoZSBudW1iZXIgb2YgZGF5cy93ZWVrcyBiZXR3ZWVuIHR3byBkYXRlcy4gCmRpZmZ0aW1lKCIyMDIxLTktNiIsICIyMDIwLTEtMjIiLCB1bml0cyA9ICJkYXlzIikKYGBgCgpJbmRpdmlkdWFsIGV4Y2VyY2lzZSAoMyBtaW5zKQoKYGBgYHtyfQojIEZpbmQgdGhlIHRvcCB0ZW4gY291bnRyaWVzIHRoYXQgaGF2ZSB0aGUgbGFyZ2VzdCBudW1iZXIgb2YgY29uZmlybWVkIGNhc2VzIG9uIDIwMjEtMDktMDIgKHJlY29tbWVuZCB1c2luZyBwaXBlISkKIyBIaW50LTE6IHRoZSBmb2xsb3dpbmcgdmVyYnMgYXJlIHJlY29tbWVuZGVkOiBzZWxlY3QsIGdyb3VwX2J5LCBzdW1tYXJpc2UsIGZpbHRlcgojIEhpbnQtMjogZmlsdGVyKHJhbmsoIGRlc2MoWCkgKSkgPD0gMTAgd2lsbCBoZWxwIHlvdSBpZGVudGlmeSB0aGUgdG9wIHRlbiB2YWx1ZXMgb2YgWAoKY292aWQxOS5jb25maXJtZWQuY2FzZXMgJT4lIAogIHNlbGVjdChDb3VudHJ5LlJlZ2lvbiwgYDIwMjEtMDktMDJgKSAlPiUgCiAgZ3JvdXBfYnkoQ291bnRyeS5SZWdpb24pICU+JSAKICBzdW1tYXJpc2UoY2FzZV9zdW1fMDIgPSBzdW0oYDIwMjEtMDktMDJgLCBuYS5ybSA9IFRSVUUpKSAlPiUgCiAgZmlsdGVyKHJhbmsoZGVzYyhjYXNlX3N1bV8wMikpIDw9IDEwICkKICAgIApgYGAKCkluZGl2aWR1YWwgZXhjZXJjaXNlICg1IG1pbnMpOiBob3cgdG8gZGVmaW5lIGRhaWx5IGdyb3d0aCByYXRlPwoKYGBgYHtyfQojIEFkYXB0IHRoZSBhYm92ZSBjb2RlIHRvIGZpbmQgdGhlIHRvcCB0ZW4gY291bnRyaWVzIHRoYXQgaGF2ZSB0aGUgaGlnaGVzdCBkYWlseSBncm93dGggcmF0ZSBvZiBjb25maXJtZWQgY2FzZXMgb24gMjAyMS0wOS0wMi4gCiNIaW50LTE6IGRlZmluZSBhIG5ldyB2YXJpYWJsZSwgZ3Jvd3RoX3JhdGUsIGVpdGhlciB3aXRoaW4gc3VtbWFyaXNlKCkgb3IgdXNpbmcgbXV0YXRlKCkKI0hpbnQtMjogZGFpbHkgZ3Jvd3RoIHJhdGUgPSAoY2FzZXNfZGF5XzIgLSBjYXNlc19kYXlfMSkvY2FzZXNfZGF5XzIKCmNvdmlkMTkuY29uZmlybWVkLmNhc2VzICU+JSAKICBzZWxlY3QoQ291bnRyeS5SZWdpb24sIGAyMDIxLTA5LTAyYCxgMjAyMS0wOS0wMWAgKSAlPiUgCiAgZ3JvdXBfYnkoQ291bnRyeS5SZWdpb24pICU+JSAKICBzdW1tYXJpc2UoY2FzZV9zdW1fMDIgPSBzdW0oYDIwMjEtMDktMDJgLCBuYS5ybSA9IFRSVUUpLCBjYXNlX3N1bV8wMSA9IHN1bShgMjAyMS0wOS0wMWAsIG5hLnJtID0gVFJVRSkpICU+JQogIG11dGF0ZShncm93dGhfcmF0ZSA9IChjYXNlX3N1bV8wMiAtIGNhc2Vfc3VtXzAxKSAvIGNhc2Vfc3VtXzAxICkgJT4lIAogIGZpbHRlcihyYW5rKGRlc2MoZ3Jvd3RoX3JhdGUpKSA8PSAxMCApCgpgYGAKCiMgTm93IGxldCdzIHRha2UgYSBsb29rIGF0IHRoZSBVUyBkYXRhIQoKYGBgYHtyfQooQ1ZfVVMgPSBjb3ZpZDE5LlVTLmRhdGEoKSApCmBgYAoKVXNlIGFjcm9zcygpIGZ1bmN0aW9uIHRvIHNlbGVjdCBhIHJhbmdlIG9mIHZhcmlhYmxlcwoKYGBgYHtyfQojIE9wZXJhdGlvbiBieSByb3cKIyBDcmVhdGUgYSBuZXcgVGFibGUsIENWX1VTX0F1ZywgdGhhdCBjb250YWlucyB0aGUgYnktc3RhdGUgc3VtIG9mIGRhaWx5IGNvbmZpcm1lZCBjYXNlcyBmcm9tIDIwMjEtMDgtMDEgdG8gMjAyMS0wOC0zMQoKCihDVl9VU19BdWcgPC0gQ1ZfVVMgJT4lIAogICAgZ3JvdXBfYnkoUHJvdmluY2VfU3RhdGUpICU+JSAKICAgIHN1bW1hcmlzZShhY3Jvc3MoYDIwMjEtMDgtMDFgOmAyMDIxLTA4LTMxYCwgc3VtKSkKKQoKYGBgCgpJbmRpdmlkdWFsIGV4Y2VyY2lzZSAoMyBtaW5zKTogdXNlIHJvd1N1bXMoKSB0byBzdW0gYnkgcm93LgoKYGBgYHtyfQojIEZpbmQgdGhlIG1vbnRobHkgc3VtIG9mIGNvbmZpcm1lZCBjYXNlcyBmb3IgZWFjaCBVUyBzdGF0ZSBpbiBBdWd1c3QsIDIwMjEuIAojIEhpbnQtMTogcm93U3VtcyhhY3Jvc3MoYGRhdGUtMWA6YGRhdGUtMmApLCBuYS5ybSA9IFRSVUUpIHdpbGwgcmV0dXJuIHRoZSBieS1yb3cgc3VtIG9mIHZhbHVlcyBpbiBhIHRpYmJsZS4gCgpDVl9VU19BdWcgJT4lIAogICAgZ3JvdXBfYnkoUHJvdmluY2VfU3RhdGUpICU+JQogICAgc3VtbWFyaXNlKCBTdW1fQXVnXzIwMjEgPSByb3dTdW1zKGFjcm9zcyhgMjAyMS0wOC0wMWA6YDIwMjEtMDgtMzFgLCBuYS5ybSA9IFRSVUUpKSkKIyBZb3VyIGNvZGUgZ29lcyBoZXJlCmBgYAoKR3JvdXAgY2hhbGxlbmdlKjogSWRlbnRpZnkgdGhlIHRvcCB0ZW4gc3RhdGVzIHRoYXQgaGF2ZSB0aGUgaGlnaGVzdCBtZWFuIGdyb3d0aCByYXRlIG9mIENPVklEIGNhc2VzIGluIEF1Z3VzdCwgMjAyMS4gIAoKSGludC0xOiBhY3Jvc3MoKSBhbGxvd3MgYXJpdGhtZXRpYyBvcGVyYXRpb25zIGZvciBhIHJhbmdlIG9mIHZhcmlhYmxlcy4gCkhpbnQtMjogcm93TWVhbnMoKSB3aWxsIHJldHVybiB0aGUgYnktcm93IGF2ZXJhZ2Ugb2YgdmFsdWVzIGluIGEgdGliYmxlCgpgYGBge3J9CkNWX1VTICU+JSAKICAgZ3JvdXBfYnkoUHJvdmluY2VfU3RhdGUpICU+JQogICBzdW1tYXJpc2UoIFN1bV9BdWdfMjAyMSA9IHJvd1N1bXMoYWNyb3NzKGAyMDIxLTA4LTAxYDpgMjAyMS0wOC0zMWAsIG5hLnJtID0gVFJVRSkpCiAgICAgICAgICAgICAgKQogICBtdXRhdGUoIC0oYWNyb3NzYDIwMjEtMDctMzFgOmAyMDIxLTA4LTMwYCwgc3VtKS8oYWNyb3NzYDIwMjEtMDctMzFgOmAyMDIxLTA4LTMwYCwgc3VtKSkKYGBgCgoK